<--- %%NOBANNER%% --> Flp2means.sas
 BackForward

/*-------------------<-- Start of Description -->--------------------\
| Nonparametric Analysis of 2-Sample means with different variances: |
| Compute p-value and confidence interval of two sample location     |
| differences;                                                       |
|--------------------<-- End of Description -->----------------------|
|--------------------------------------------------------------------|
|--------------<-- Start of Files or Arguments Needed -->------------|
|  The following parameters are needed for the macro:                |
|     indata  = Input dataset;                                       |
|     class   = Grouping variable (with values 1, 2);                |
|     trtgrp  = Treatment group Indicator: a value to represent      |
|               treatment;                                           |
|     AVAR    = Analysis variable;                                   |
|     ALPHA   = 100*(1-ALPHA)% level for the two-sided confidence    |
|               interval; default is 0.05;                           |
|     tail    = two-sided or one sided confidence interval; default  |
|               is 2;                                                |
|     better  = The smaller the better or The bigger the better;     |
|               Superiority Test or Non-Inferiority Test;            |
|               default is "The bigger the better";                  |
|     tiesadj = Ties Adjustment; Default is True.                    |
|     outdata = output data set;                                     |
|---------------<-- End of Files or Arguments Needed -->-------------|
|--------------------------------------------------------------------|
|----------------<-- Start of Example and Usage -->------------------|
| Example:                                                           |
|   data hw_ex2;                                                     |
|      input grp :$20. val;                                          |
|   cards;                                                           |
|   HealthGreese 297                                                 |
|   HealthGreese 340                                                 |
|   HealthGreese 325                                                 |
|   HealthGreese 227                                                 |
|   HealthGreese 277                                                 |
|   HealthGreese 337                                                 |
|   HealthGreese 250                                                 |
|   HealthGreese 290                                                 |
|   Lead-Poisoned 293                                                |
|   Lead-Poisoned 291                                                |
|   Lead-Poisoned 289                                                |
|   Lead-Poisoned 430                                                |
|   Lead-Poisoned 510                                                |
|   Lead-Poisoned 353                                                |
|   Lead-Poisoned 318                                                |
|   ;                                                                |
|   run;                                                             |
|   %Flp2mean(indata=hw_ex2, class=grp, avar=val, tail=1,alpha=0.08, |
|             trtgrp='Lead-Poisoned', outdata=test);                 |
| Usage: %Flp2mean(indata=, class=, trtgrp= avar=, alpha=0.05,       |
|                   tail=2, outdata=);                               |
| Reference: Hollander M and Wolfe DA, "Nonparmetric Statistical     |
|            Methods," pp ??-??.                                     |
\-------------------<-- End of Example and Usage -->----------------*/
%macro Flp2mean(indata=, class=, trtgrp=, avar=, alpha=0.05, tail=2,
                better=BIG, tiesadj=T,outdata=);
/*--------------------------------------------\
| Author:  Duo Zhou;                          |
| Created: 2-17-2002 6:30pm;                  |
| Purpose: Nonparametric Analysis for 2-sample|
|          means with Different Variances;    |
\--------------------------------------------*/
%local _grplist _dummy _grpn _i_ _j_ _grpi _varchk _varlen _qpdiff _minimumn _tmplast_;
%let _tmplast_=&syslast;
%if (%index(&trtgrp,%str(%'))) or (%index(&trtgrp,%str(%")))%then %do;
   %let trtgrp=%sysfunc(dequote(&trtgrp));
%end;
proc sql noprint;
   select count(distinct &class) into: _grpn
   from &indata
   where &class is not missing;
quit;
%if &_grpn < 2 and &_grpn >=1 %then %do;
   %put ==> Alert! In order to compare 2-Sample location problem with unequal variances,;
   %put +++        you must provide me with a group variable of 2 groups: &_grpn groups;
   %put +++        is not enough for comparison.;
   %goto finish;
%end;
%else %if &_grpn > 2 %then %do;
   %put ==> Alert! I can only compare Two-Sample location problem with Unequal Variances,;
   %put +++        you must provide me with a group variable of 2 groups: I don%str(%')t ;
   %put +++        know how to deal with &_grpn groups.;
   %goto finish;
%end;
%let _minimumn=0;
proc sql noprint;
   select min(totobs) into: _minimumn
   from (select count(*) as totobs
         from &indata
         group by &class);
quit;
%if &_minimumn<10 %then %do;
   %put ==> Note: Sample size is extremely small, do not trust the P-value in the output, which was;
   %put +++       calculated using large sample approximation. In order by get a relatively accurate;
   %put +++       p-value, please compare the calculated statistics provided in the output with the;
   %put +++       table given in book "Nonparmetric Statistical Methods" (Hollander M and Wolfe DA).;
%end;
%else %if &_minimumn<30 %then %do;
   %put ==> Note: Sample size is relatively small, P-value given in the output is calcualted using;
   %put +++       large sample approximation, so use it carefully. But you can manually compare the;
   %put +++       calculated statistics provided in the output with the table given in book;
   %put +++       "Nonparmetric Statistical Methods" (Hollander M and Wolfe DA).;
%end;
%else %do;
   %put ===> Note: the nonparametric analysis is carried out using large sample approprimation, so;
   %put +++        be careful about the P-value given in the output.;
%end;
proc ttest data=&indata;
   var &avar;
   class &class;
   title1 'T-Test of Means between Group 1 - Group 2';
run;
proc npar1way wilcoxon data=&indata;
   var &avar;
   class &class;
   title1 'Wilcoxon Rank-Sum Test of Means between Group 1 - Group 2';
run;
title;

/* Start of Fligner-Policello Procedure*/
proc rank data=&indata out=_tmpr1;
var &avar;
ranks _ranka;
run;
proc contents data=_tmpr1 out=_vchecktmp noprint;
run;
proc sql noprint;
   select type, length into: _varchk, :_varlen
   from _vchecktmp
   where name="&class";
quit;
%if (%quote(&trtgrp) eq) %then %do;
   proc sql noprint;
      select distinct &class into: _grplist separated by ','
      from &indata
      where &class is not missing;
   quit;
%end;
%else %do;
   %let _trtchk=;
   proc sql noprint;
      select &class into: _trtchk
      from &indata
      /* &trtgrp refer to a value, but &class is referring to a data variable; */
      %if (%quote(&_varchk) eq %quote(1)) %then %do;
      where &class = &trtgrp
      %end;
      %else %do;
      where upcase(&class)="%upcase(&trtgrp)"
      %end;;
   quit;
   /* Check the class variable to see if it has a treatment group, otherwise error; */
   %if (%quote(&_trtchk) ne) %then %do;
      /* The treatment group will be the 2nd group, the control group will be the 1st; */
      proc sql noprint;
         select distinct &class,
                %if (%quote(&_varchk) eq %quote(1)) %then %do; (1+(&class=&trtgrp)) as _cgrp %end;
                %else %do; (1+(upcase(&class)="%upcase(&trtgrp)")) as _cgrp %end;
                into: _grplist separated by ',',
                    : _dummy
         from _tmpr1
         where &class is not missing
         order by _cgrp;
      quit;
   %end;
   %else %do;
      %put ==> Alert! The class variable &class does not have a treatment group as you specified: &trtgrp;
      %put +++        Sorry, I can%str(%')t do any further analysis;
      %goto finish;
   %end;
%end;
%do _i_=1 %to &_grpn;
   %let _grp&_i_=%trim(%left(%qscan(%quote(&_grplist), &_i_, %str(,))));
%end;
%if (%index(%quote(%upcase(&tiesadj)),T)) or (%quote(&tiesadj)=1) %then %do;
proc sql;
   create table _ties1 as
   select h1.&class, h1.&avar, h1._ranka, h2._ntie2
   from _tmpr1 as h1 left join
   (select distinct &class, &avar, _ranka, count(*) as _ntie2
    from _tmpr1
    where &class="&_grp1" and
          _ranka in (select distinct _ranka
                     from _tmpr1
                     where &class="&_grp2")
    group by _ranka) as h2
    on h1.&class ne h2.&class and h1._ranka=h2._ranka;

   create table _tmpr1 as
   select h1.&class, h1.&avar, h1._ranka, h1._ntie2, h2._ntie1
   from _ties1 as h1 left join
   (select distinct &class, &avar, _ranka, count(*) as _ntie1
    from _ties1
    where &class="&_grp2" and
          _ranka in (select distinct _ranka
                     from _ties1
                     where &class="&_grp1")
    group by _ranka) as h2
    on h1.&class ne h2.&class and h1._ranka=h2._ranka
    order by h1._ranka, h1.&class;
quit;
%end;
%else %do;
   proc sort data=_tmpr1;
   by _ranka &class;
   run;
%end;
data _tmp1(keep=&class &avar _ranka %do _i_=1 %to &_grpn; _p&_i_ %end;)
      %do _i_=1 %to &_grpn;
         _grptmp&_i_(keep=&class &avar _ranka _p&_i_)
      %end;;
   set _tmpr1;
   by _ranka &class;
   retain %do _i_=1 %to &_grpn; _x&_i_ _tmpx&_i_ _p&_i_ %end; 0;
   %do _j_=1 %to &_grpn;
      %let _jc_=%eval(%eval(&_grpn-&_j_)+1);
      %if &_varchk=1 %then %do;
      if &class=&&_grp&_j_ then do;
      %end;
      %else %do;
      if &class="&&_grp&_j_" then do;
      %end;
      %if (%index(%quote(%upcase(&tiesadj)),T)) or (%quote(&tiesadj)=1) %then %do;
      _tmpx&_j_=_tmpx&_j_+1;
      if first.&class then do;
         if _ntie&_j_ ne . then do;
            _x&_jc_=_x&_jc_+_ntie&_j_/2;
         end;
         else do;
            _x&_j_=_tmpx&_j_;
            _x&_jc_=_tmpx&_jc_;
         end;
      end;
      %end;
      %else %do;
         _x&_j_=_x&_j_+1;
      %end;
         _p&_j_=_x&_jc_;
         _p&_jc_=0;
         output _grptmp&_j_;
      end;
      label _p&_j_="Number of Sample %upcase(&&_grp&_jc_) Observations Less Then %upcase(&&_grp&_j_)";
   %end;
   output _tmp1;
run;
%do _i_=1 %to &_grpn;
proc means data=_grptmp&_i_ noprint ;
   class &class;
   var _p&_i_;
   output out=_tmp1out&_i_ N=_n sum=_sum mean=_mean std=_std;
run;
%end;
proc sql noprint;
   create table _tmp1out as
   select t1.&class as _group1,
          (t1._n-1)*t1._std*t1._std as _v1,
          (t1._n*t1._mean) as _psum,
          (t1._mean) as _pbar,
          t2.&class as _group2,
          (t2._n-1)*t2._std*t2._std as _v2,
          (t2._n*t2._mean) as _qsum,
          (t2._mean) as _qbar
   from _tmp1out1 as t1, _tmp1out2 as t2
   where t1.&class is not missing and t2.&class is not missing;

   select (t2._n*t2._mean) - (t1._n*t1._mean) into: _qpdiff
   from _tmp1out1 as t1, _tmp1out2 as t2
   where t1.&class is not missing and t2.&class is not missing;
quit;
data _tmp2out;
   set _tmp1out;
   file print;
   _uhead=(_qsum-_psum)/(2*sqrt(_v1+_v2+_pbar*_qbar));
   put /   +5 'Hepothesis Test For Two-Sample Location Differences With Unequal Variances using'
       /   +5 'large sample approximation and Ties Adjustment with Fligner-Policello Procedure'
       /   +5 '(Reference:  Hollander M and Wolfe DA,"Nonparametric Statistical Methods," pp ??-??)';
   /* 2, q - the Treatment group */
   %if (%quote(&tail) eq %quote(2)) or (%index(%quote(%upcase(&tail)),TWO)) %then %do;
      _za=probit(1-&alpha/2);
      _pvalue=(1-probnorm(abs(_uhead)))*2;
      _lcl=(_qsum-_psum)-_za*2*sqrt(_v1+_v2+_pbar*_qbar);
      _ucl=(_qsum-_psum)+_za*2*sqrt(_v1+_v2+_pbar*_qbar);
      put / +5   %if &_varchk=1 %then %do; "Two Sided Test: H0: P(Group &_grp2-&_grp1>0)=1/2 vs"
                                    / +5   "                H1: P(Group &_grp2-&_grp1>0)<>1/2;" %end;
                 %else %do;                "Two Sided Test: H0: P(Group '&_grp2'-'&_grp1'>0)=1/2 vs"
                                    / +5   "                H1: P(Group '&_grp2'-'&_grp1'>0)<>1/2;" %end;
      put / +10 "The Fligner-Policello Statistics = " _uhead;
      put / +10 "%sysevalf((1-&alpha)*100)% confidence bound" @42 "= (" _lcl +(-1) ", " +(-1) _ucl ")";
      format _lcl _ucl 10.6; keep _lcl _ucl;
      label _lcl="Lower Confidence Bound" _ucl="Upper Confidence Bound";
   %end;
   %else %if (%quote(&tail) eq %quote(1)) or (%index(%upcase(&tail),ONE)) %then %do;
      _za=probit(1-&alpha);
      %if (%index(%quote(%upcase(&better)), BIG)) %then %do;
      _pvalue=(1-probnorm(_uhead));
      _lcl=(_qsum-_psum)-_za*2*sqrt(_v1+_v2+_pbar*_qbar);
      put / +5   %if &_varchk=1 %then %do; "One Sided Test: H0: P(Group &_grp2-&_grp1>0)<=1/2 vs"
                                    / +5   "                H1: P(Group &_grp2-&_grp1>0)> 1/2;"; %end;
                 %else %do;                "One Sided Test: H0: P(Group '&_grp2'-'&_grp1'>0)<=1/2 vs"
                                    / +5   "                H1: P(Group '&_grp2'-'&_grp1'>0)> 1/2;"; %end;
      put / +10 "The Fligner-Policello Statistics = " _uhead;
      put / +10 "%sysevalf((1-&alpha)*100)% confidence bound" @44 "= (" _lcl +(-1) ", +Inf)";
      format _lcl 10.6; keep _lcl; label _lcl="Lower Confidence Bound";
      %end;
      %else %if (%index(%quote(%upcase(&better)), SMALL)) %then %do;
      _pvalue=probnorm(_uhead);
      _ucl=(_qsum-_psum)+_za*2*sqrt(_v1+_v2+_pbar*_qbar);
      put / +5   %if &_varchk=1 %then %do; "One Sided Test: H0: P(Group &_grp2-&_grp1>0)>=1/2 vs"
                                    / +5   "                H1: P(Group &_grp2-&_grp1>0)< 1/2;"; %end;
                 %else %do;                "One Sided Test: H0: P(Group '&_grp2'-'&_grp1'>0)>=1/2 vs"
                                    / +5   "                H1: P(Group '&_grp2'-'&_grp1'>0)< 1/2;"; %end;
      put / +10 "The Fligner-Policello Statistics = " _uhead;
      put /  +10 "%sysevalf((1-&alpha)*100)% confidence bound" @44 "= (-Inf, " _ucl +(-1) ")";
      format _ucl 10.6; keep _ucl; label _ucl="Upper Confidence Bound";
      %end;
   %end;
   put / +10 "P-value" @44 "= " _pvalue;
   format _uhead _pvalue 10.6; keep _uhead _pvalue;
   label _uhead="Fligner-Policello Statistics" _pvalue="P-value";
run;
%if (%quote(&outdata) ne) %then %do;
   data &outdata;
      set _tmp2out;
   run;
%end;
%else %let syslast=&_tmplast_;
proc datasets library=work nolist;
     delete _ties1 _tmp1 _tmpr1 _grptmp1 - _grptmp%trim(%left(&_grpn))
            _tmp1out _tmp1out1 - _tmp1out%trim(%left(&_grpn));
run;quit;
%finish:
%mend Flp2mean;